An Advanced Introduction to

Kazuharu Yanagimoto

January 13, 2023

Project Based Workflow

Q. Why Don’t Your Codes Work on My Computer?

A. Conflicts in Path or Package Version

A. You don’t use here and renv

R Project

Have you ever click this button?


You should ALWAYS use R Project!

Why Do We Need to Use R Project?


Path Manager

Package Manager

Always Use here for Paths


The function here::here() treats the proejct directory as the root directory.

here::here()
[1] "/home/rstudio/workshop-r-2022"


You should always specify the path by here::here()

data <- readr::read_csv(
  here::here("data/tiny.csv")
)


It works in Windows, Mac, Linux (of course, in a Docker environment)

Remember…

If the first line of your R script is setwd("C:\Users\jenny\path\that\only\I\have")

I* will come into your office and SET YOUR COMPPUTER ON FIRE 🔥.

–Jenny Bryan

renv Is Smarter than Us


  • Init the environment with renv::init(). It creates renv/ and renv.lock file
  • At some point, you can record your package and its version information with renv::snapshot()
  • Your collaborater can install the packages just by renv::restore()
renv.lock
{
  "R": {
    "Version": "4.2.2",
    "Repositories": [
      {
        "Name": "CRAN",
        "URL": "https://packagemanager.posit.co/cran/latest"
      }
    ]
  },
  "Packages": {
    "DBI": {
      "Package": "DBI",
      "Version": "1.1.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "b2866e62bab9378c3cc9476a1954226b",
      "Requirements": []
    },
    "MASS": {
      "Package": "MASS",
      "Version": "7.3-58.1",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "762e1804143a332333c054759f89a706",
      "Requirements": []
    },
    "Matrix": {
      "Package": "Matrix",
      "Version": "1.5-1",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "539dc0c0c05636812f1080f473d2c177",
      "Requirements": [
        "lattice"
      ]
    },
    "R6": {
      "Package": "R6",
      "Version": "2.5.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "470851b6d5d0ac559e9d01bb352b4021",
      "Requirements": []
    },
    "RColorBrewer": {
      "Package": "RColorBrewer",
      "Version": "1.1-3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "45f0398006e83a5b10b72a90663d8d8c",
      "Requirements": []
    },
    "askpass": {
      "Package": "askpass",
      "Version": "1.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "e8a22846fff485f0be3770c2da758713",
      "Requirements": [
        "sys"
      ]
    },
    "assertthat": {
      "Package": "assertthat",
      "Version": "0.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "50c838a310445e954bc13f26f26a6ecf",
      "Requirements": []
    },
    "backports": {
      "Package": "backports",
      "Version": "1.4.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c39fbec8a30d23e721980b8afb31984c",
      "Requirements": []
    },
    "base64enc": {
      "Package": "base64enc",
      "Version": "0.1-3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "543776ae6848fde2f48ff3816d0628bc",
      "Requirements": []
    },
    "bit": {
      "Package": "bit",
      "Version": "4.0.5",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "d242abec29412ce988848d0294b208fd",
      "Requirements": []
    },
    "bit64": {
      "Package": "bit64",
      "Version": "4.0.5",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "9fe98599ca456d6552421db0d6772d8f",
      "Requirements": [
        "bit"
      ]
    },
    "blob": {
      "Package": "blob",
      "Version": "1.2.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "10d231579bc9c06ab1c320618808d4ff",
      "Requirements": [
        "rlang",
        "vctrs"
      ]
    },
    "broom": {
      "Package": "broom",
      "Version": "1.0.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "1773f8d5102f9853ecd18a0d13d460fd",
      "Requirements": [
        "backports",
        "dplyr",
        "ellipsis",
        "generics",
        "glue",
        "purrr",
        "rlang",
        "stringr",
        "tibble",
        "tidyr"
      ]
    },
    "bslib": {
      "Package": "bslib",
      "Version": "0.4.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a7fbf03946ad741129dc81098722fca1",
      "Requirements": [
        "base64enc",
        "cachem",
        "htmltools",
        "jquerylib",
        "jsonlite",
        "memoise",
        "mime",
        "rlang",
        "sass"
      ]
    },
    "cachem": {
      "Package": "cachem",
      "Version": "1.0.6",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "648c5b3d71e6a37e3043617489a0a0e9",
      "Requirements": [
        "fastmap",
        "rlang"
      ]
    },
    "callr": {
      "Package": "callr",
      "Version": "3.7.3",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "9b2191ede20fa29828139b9900922e51",
      "Requirements": [
        "R6",
        "processx"
      ]
    },
    "cellranger": {
      "Package": "cellranger",
      "Version": "1.1.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f61dbaec772ccd2e17705c1e872e9e7c",
      "Requirements": [
        "rematch",
        "tibble"
      ]
    },
    "cli": {
      "Package": "cli",
      "Version": "3.5.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "eb9fc121ad9a1075c471107ef185be46",
      "Requirements": []
    },
    "clipr": {
      "Package": "clipr",
      "Version": "0.8.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "3f038e5ac7f41d4ac41ce658c85e3042",
      "Requirements": []
    },
    "colorspace": {
      "Package": "colorspace",
      "Version": "2.0-3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "bb4341986bc8b914f0f0acf2e4a3f2f7",
      "Requirements": []
    },
    "cpp11": {
      "Package": "cpp11",
      "Version": "0.4.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "ed588261931ee3be2c700d22e94a29ab",
      "Requirements": []
    },
    "crayon": {
      "Package": "crayon",
      "Version": "1.5.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "e8a1e41acf02548751f45c718d55aa6a",
      "Requirements": []
    },
    "curl": {
      "Package": "curl",
      "Version": "4.3.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "0eb86baa62f06e8855258fa5a8048667",
      "Requirements": []
    },
    "data.table": {
      "Package": "data.table",
      "Version": "1.14.6",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "aecef50008ea7b57c76f1cb5c127fb02",
      "Requirements": []
    },
    "dbplyr": {
      "Package": "dbplyr",
      "Version": "2.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f6c7eb9617e4d2a86bb7182fff99c805",
      "Requirements": [
        "DBI",
        "R6",
        "assertthat",
        "blob",
        "cli",
        "dplyr",
        "glue",
        "lifecycle",
        "magrittr",
        "pillar",
        "purrr",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs",
        "withr"
      ]
    },
    "digest": {
      "Package": "digest",
      "Version": "0.6.31",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "8b708f296afd9ae69f450f9640be8990",
      "Requirements": []
    },
    "dplyr": {
      "Package": "dplyr",
      "Version": "1.0.10",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "539412282059f7f0c07295723d23f987",
      "Requirements": [
        "R6",
        "generics",
        "glue",
        "lifecycle",
        "magrittr",
        "pillar",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs"
      ]
    },
    "dtplyr": {
      "Package": "dtplyr",
      "Version": "1.2.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c5f8828a0b459a703db190b001ad4818",
      "Requirements": [
        "crayon",
        "data.table",
        "dplyr",
        "ellipsis",
        "glue",
        "lifecycle",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs"
      ]
    },
    "ellipsis": {
      "Package": "ellipsis",
      "Version": "0.3.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "bb0eec2fe32e88d9e2836c2f73ea2077",
      "Requirements": [
        "rlang"
      ]
    },
    "evaluate": {
      "Package": "evaluate",
      "Version": "0.19",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5aac3cd0a3ccb1a738941796b28c26fe",
      "Requirements": []
    },
    "fansi": {
      "Package": "fansi",
      "Version": "1.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "83a8afdbe71839506baa9f90eebad7ec",
      "Requirements": []
    },
    "farver": {
      "Package": "farver",
      "Version": "2.1.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "8106d78941f34855c440ddb946b8f7a5",
      "Requirements": []
    },
    "fastmap": {
      "Package": "fastmap",
      "Version": "1.1.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "77bd60a6157420d4ffa93b27cf6a58b8",
      "Requirements": []
    },
    "forcats": {
      "Package": "forcats",
      "Version": "0.5.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "9d95bc88206321cd1bc98480ecfd74bb",
      "Requirements": [
        "cli",
        "ellipsis",
        "glue",
        "lifecycle",
        "magrittr",
        "rlang",
        "tibble",
        "withr"
      ]
    },
    "fs": {
      "Package": "fs",
      "Version": "1.5.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "7c89603d81793f0d5486d91ab1fc6f1d",
      "Requirements": []
    },
    "gargle": {
      "Package": "gargle",
      "Version": "1.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "cca71329ad88e21267f09255d3f008c2",
      "Requirements": [
        "cli",
        "fs",
        "glue",
        "httr",
        "jsonlite",
        "rappdirs",
        "rlang",
        "rstudioapi",
        "withr"
      ]
    },
    "generics": {
      "Package": "generics",
      "Version": "0.1.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "15e9634c0fcd294799e9b2e929ed1b86",
      "Requirements": []
    },
    "ggplot2": {
      "Package": "ggplot2",
      "Version": "3.4.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "fd2aab12f54400c6bca43687231e246b",
      "Requirements": [
        "MASS",
        "cli",
        "glue",
        "gtable",
        "isoband",
        "lifecycle",
        "mgcv",
        "rlang",
        "scales",
        "tibble",
        "vctrs",
        "withr"
      ]
    },
    "glue": {
      "Package": "glue",
      "Version": "1.6.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4f2596dfb05dac67b9dc558e5c6fba2e",
      "Requirements": []
    },
    "googledrive": {
      "Package": "googledrive",
      "Version": "2.0.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c3a25adbbfbb03f12e6f88c5fb1f3024",
      "Requirements": [
        "cli",
        "gargle",
        "glue",
        "httr",
        "jsonlite",
        "lifecycle",
        "magrittr",
        "pillar",
        "purrr",
        "rlang",
        "tibble",
        "uuid",
        "vctrs",
        "withr"
      ]
    },
    "googlesheets4": {
      "Package": "googlesheets4",
      "Version": "1.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "3b449d5292327880fc6cb61d0b2e9063",
      "Requirements": [
        "cellranger",
        "cli",
        "curl",
        "gargle",
        "glue",
        "googledrive",
        "httr",
        "ids",
        "magrittr",
        "purrr",
        "rematch2",
        "rlang",
        "tibble",
        "vctrs"
      ]
    },
    "gtable": {
      "Package": "gtable",
      "Version": "0.3.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "36b4265fb818f6a342bed217549cd896",
      "Requirements": []
    },
    "haven": {
      "Package": "haven",
      "Version": "2.5.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5b45a553fca2217a07b6f9c843304c44",
      "Requirements": [
        "cli",
        "cpp11",
        "forcats",
        "hms",
        "lifecycle",
        "readr",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs"
      ]
    },
    "here": {
      "Package": "here",
      "Version": "1.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "24b224366f9c2e7534d2344d10d59211",
      "Requirements": [
        "rprojroot"
      ]
    },
    "highr": {
      "Package": "highr",
      "Version": "0.10",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "06230136b2d2b9ba5805e1963fa6e890",
      "Requirements": [
        "xfun"
      ]
    },
    "hms": {
      "Package": "hms",
      "Version": "1.1.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "41100392191e1244b887878b533eea91",
      "Requirements": [
        "ellipsis",
        "lifecycle",
        "pkgconfig",
        "rlang",
        "vctrs"
      ]
    },
    "htmltools": {
      "Package": "htmltools",
      "Version": "0.5.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "9d27e99cc90bd701c0a7a63e5923f9b7",
      "Requirements": [
        "base64enc",
        "digest",
        "ellipsis",
        "fastmap",
        "rlang"
      ]
    },
    "httr": {
      "Package": "httr",
      "Version": "1.4.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "57557fac46471f0dbbf44705cc6a5c8c",
      "Requirements": [
        "R6",
        "curl",
        "jsonlite",
        "mime",
        "openssl"
      ]
    },
    "ids": {
      "Package": "ids",
      "Version": "1.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "99df65cfef20e525ed38c3d2577f7190",
      "Requirements": [
        "openssl",
        "uuid"
      ]
    },
    "isoband": {
      "Package": "isoband",
      "Version": "0.2.7",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "0080607b4a1a7b28979aecef976d8bc2",
      "Requirements": []
    },
    "janitor": {
      "Package": "janitor",
      "Version": "2.1.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "6de84a8c67fb247e721166049c84695f",
      "Requirements": [
        "dplyr",
        "lifecycle",
        "lubridate",
        "magrittr",
        "purrr",
        "rlang",
        "snakecase",
        "stringi",
        "stringr",
        "tidyr",
        "tidyselect"
      ]
    },
    "jquerylib": {
      "Package": "jquerylib",
      "Version": "0.1.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5aab57a3bd297eee1c1d862735972182",
      "Requirements": [
        "htmltools"
      ]
    },
    "jsonlite": {
      "Package": "jsonlite",
      "Version": "1.8.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a4269a09a9b865579b2635c77e572374",
      "Requirements": []
    },
    "knitr": {
      "Package": "knitr",
      "Version": "1.41",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "6d4971f3610e75220534a1befe81bc92",
      "Requirements": [
        "evaluate",
        "highr",
        "stringr",
        "xfun",
        "yaml"
      ]
    },
    "labeling": {
      "Package": "labeling",
      "Version": "0.4.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "3d5108641f47470611a32d0bdf357a72",
      "Requirements": []
    },
    "lattice": {
      "Package": "lattice",
      "Version": "0.20-45",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "b64cdbb2b340437c4ee047a1f4c4377b",
      "Requirements": []
    },
    "lifecycle": {
      "Package": "lifecycle",
      "Version": "1.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "001cecbeac1cff9301bdc3775ee46a86",
      "Requirements": [
        "cli",
        "glue",
        "rlang"
      ]
    },
    "lubridate": {
      "Package": "lubridate",
      "Version": "1.9.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "2af4550c2f0f7fbe7cbbf3dbf4ea3902",
      "Requirements": [
        "generics",
        "timechange"
      ]
    },
    "magrittr": {
      "Package": "magrittr",
      "Version": "2.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "7ce2733a9826b3aeb1775d56fd305472",
      "Requirements": []
    },
    "memoise": {
      "Package": "memoise",
      "Version": "2.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "e2817ccf4a065c5d9d7f2cfbe7c1d78c",
      "Requirements": [
        "cachem",
        "rlang"
      ]
    },
    "mgcv": {
      "Package": "mgcv",
      "Version": "1.8-41",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "6b3904f13346742caa3e82dd0303d4ad",
      "Requirements": [
        "Matrix",
        "nlme"
      ]
    },
    "mime": {
      "Package": "mime",
      "Version": "0.12",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "18e9c28c1d3ca1560ce30658b22ce104",
      "Requirements": []
    },
    "modelr": {
      "Package": "modelr",
      "Version": "0.1.10",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "bc23cda9c6a8f91dc1c10e1994494711",
      "Requirements": [
        "broom",
        "magrittr",
        "purrr",
        "rlang",
        "tibble",
        "tidyr",
        "tidyselect",
        "vctrs"
      ]
    },
    "munsell": {
      "Package": "munsell",
      "Version": "0.5.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "6dfe8bf774944bd5595785e3229d8771",
      "Requirements": [
        "colorspace"
      ]
    },
    "nlme": {
      "Package": "nlme",
      "Version": "3.1-160",
      "Source": "Repository",
      "Repository": "CRAN",
      "Hash": "02e3c6e7df163aafa8477225e6827bc5",
      "Requirements": [
        "lattice"
      ]
    },
    "openssl": {
      "Package": "openssl",
      "Version": "2.0.5",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "b04c27110bf367b4daa93f34f3d58e75",
      "Requirements": [
        "askpass"
      ]
    },
    "pillar": {
      "Package": "pillar",
      "Version": "1.8.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f2316df30902c81729ae9de95ad5a608",
      "Requirements": [
        "cli",
        "fansi",
        "glue",
        "lifecycle",
        "rlang",
        "utf8",
        "vctrs"
      ]
    },
    "pkgconfig": {
      "Package": "pkgconfig",
      "Version": "2.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "01f28d4278f15c76cddbea05899c5d6f",
      "Requirements": []
    },
    "prettyunits": {
      "Package": "prettyunits",
      "Version": "1.1.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "95ef9167b75dde9d2ccc3c7528393e7e",
      "Requirements": []
    },
    "processx": {
      "Package": "processx",
      "Version": "3.8.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a33ee2d9bf07564efb888ad98410da84",
      "Requirements": [
        "R6",
        "ps"
      ]
    },
    "progress": {
      "Package": "progress",
      "Version": "1.2.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "14dc9f7a3c91ebb14ec5bb9208a07061",
      "Requirements": [
        "R6",
        "crayon",
        "hms",
        "prettyunits"
      ]
    },
    "ps": {
      "Package": "ps",
      "Version": "1.7.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "68dd03d98a5efd1eb3012436de45ba83",
      "Requirements": []
    },
    "purrr": {
      "Package": "purrr",
      "Version": "1.0.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "1ad491d27989ec6c26a2918ad6df116b",
      "Requirements": [
        "cli",
        "lifecycle",
        "magrittr",
        "rlang",
        "vctrs"
      ]
    },
    "rappdirs": {
      "Package": "rappdirs",
      "Version": "0.3.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5e3c5dc0b071b21fa128676560dbe94d",
      "Requirements": []
    },
    "readr": {
      "Package": "readr",
      "Version": "2.1.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "2dfbfc673ccb3de3d8836b4b3bd23d14",
      "Requirements": [
        "R6",
        "cli",
        "clipr",
        "cpp11",
        "crayon",
        "hms",
        "lifecycle",
        "rlang",
        "tibble",
        "tzdb",
        "vroom"
      ]
    },
    "readxl": {
      "Package": "readxl",
      "Version": "1.4.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "5c1fbc365ac0a3fe7728ac79108b8e64",
      "Requirements": [
        "cellranger",
        "cpp11",
        "progress",
        "tibble"
      ]
    },
    "rematch": {
      "Package": "rematch",
      "Version": "1.0.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c66b930d20bb6d858cd18e1cebcfae5c",
      "Requirements": []
    },
    "rematch2": {
      "Package": "rematch2",
      "Version": "2.1.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "76c9e04c712a05848ae7a23d2f170a40",
      "Requirements": [
        "tibble"
      ]
    },
    "renv": {
      "Package": "renv",
      "Version": "0.16.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c9e8442ab69bc21c9697ecf856c1e6c7",
      "Requirements": []
    },
    "reprex": {
      "Package": "reprex",
      "Version": "2.0.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "d66fe009d4c20b7ab1927eb405db9ee2",
      "Requirements": [
        "callr",
        "cli",
        "clipr",
        "fs",
        "glue",
        "knitr",
        "lifecycle",
        "rlang",
        "rmarkdown",
        "rstudioapi",
        "withr"
      ]
    },
    "rlang": {
      "Package": "rlang",
      "Version": "1.0.6",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4ed1f8336c8d52c3e750adcdc57228a7",
      "Requirements": []
    },
    "rmarkdown": {
      "Package": "rmarkdown",
      "Version": "2.19",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4e29299e1f4c7eabb0b8365b338adf3c",
      "Requirements": [
        "bslib",
        "evaluate",
        "htmltools",
        "jquerylib",
        "jsonlite",
        "knitr",
        "stringr",
        "tinytex",
        "xfun",
        "yaml"
      ]
    },
    "rprojroot": {
      "Package": "rprojroot",
      "Version": "2.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "1de7ab598047a87bba48434ba35d497d",
      "Requirements": []
    },
    "rstudioapi": {
      "Package": "rstudioapi",
      "Version": "0.14",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "690bd2acc42a9166ce34845884459320",
      "Requirements": []
    },
    "rvest": {
      "Package": "rvest",
      "Version": "1.0.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a4a5ac819a467808c60e36e92ddf195e",
      "Requirements": [
        "cli",
        "glue",
        "httr",
        "lifecycle",
        "magrittr",
        "rlang",
        "selectr",
        "tibble",
        "withr",
        "xml2"
      ]
    },
    "sass": {
      "Package": "sass",
      "Version": "0.4.4",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c76cbac7ca04ce82d8c38e29729987a3",
      "Requirements": [
        "R6",
        "fs",
        "htmltools",
        "rappdirs",
        "rlang"
      ]
    },
    "scales": {
      "Package": "scales",
      "Version": "1.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "906cb23d2f1c5680b8ce439b44c6fa63",
      "Requirements": [
        "R6",
        "RColorBrewer",
        "farver",
        "labeling",
        "lifecycle",
        "munsell",
        "rlang",
        "viridisLite"
      ]
    },
    "selectr": {
      "Package": "selectr",
      "Version": "0.4-2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "3838071b66e0c566d55cc26bd6e27bf4",
      "Requirements": [
        "R6",
        "stringr"
      ]
    },
    "snakecase": {
      "Package": "snakecase",
      "Version": "0.11.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4079070fc210c7901c0832a3aeab894f",
      "Requirements": [
        "stringi",
        "stringr"
      ]
    },
    "stringi": {
      "Package": "stringi",
      "Version": "1.7.8",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "a68b980681bcbc84c7a67003fa796bfb",
      "Requirements": []
    },
    "stringr": {
      "Package": "stringr",
      "Version": "1.5.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "671a4d384ae9d32fc47a14e98bfa3dc8",
      "Requirements": [
        "cli",
        "glue",
        "lifecycle",
        "magrittr",
        "rlang",
        "stringi",
        "vctrs"
      ]
    },
    "sys": {
      "Package": "sys",
      "Version": "3.4.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "34c16f1ef796057bfa06d3f4ff818a5d",
      "Requirements": []
    },
    "tibble": {
      "Package": "tibble",
      "Version": "3.1.8",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "56b6934ef0f8c68225949a8672fe1a8f",
      "Requirements": [
        "fansi",
        "lifecycle",
        "magrittr",
        "pillar",
        "pkgconfig",
        "rlang",
        "vctrs"
      ]
    },
    "tidyr": {
      "Package": "tidyr",
      "Version": "1.2.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "cdb403db0de33ccd1b6f53b83736efa8",
      "Requirements": [
        "cpp11",
        "dplyr",
        "ellipsis",
        "glue",
        "lifecycle",
        "magrittr",
        "purrr",
        "rlang",
        "tibble",
        "tidyselect",
        "vctrs"
      ]
    },
    "tidyselect": {
      "Package": "tidyselect",
      "Version": "1.2.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "79540e5fcd9e0435af547d885f184fd5",
      "Requirements": [
        "cli",
        "glue",
        "lifecycle",
        "rlang",
        "vctrs",
        "withr"
      ]
    },
    "tidyverse": {
      "Package": "tidyverse",
      "Version": "1.3.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "972389aea7fa1a34739054a810d0c6f6",
      "Requirements": [
        "broom",
        "cli",
        "crayon",
        "dbplyr",
        "dplyr",
        "dtplyr",
        "forcats",
        "ggplot2",
        "googledrive",
        "googlesheets4",
        "haven",
        "hms",
        "httr",
        "jsonlite",
        "lubridate",
        "magrittr",
        "modelr",
        "pillar",
        "purrr",
        "readr",
        "readxl",
        "reprex",
        "rlang",
        "rstudioapi",
        "rvest",
        "stringr",
        "tibble",
        "tidyr",
        "xml2"
      ]
    },
    "timechange": {
      "Package": "timechange",
      "Version": "0.1.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "4657195cc632097bb8d140d626b519fb",
      "Requirements": [
        "cpp11"
      ]
    },
    "tinytex": {
      "Package": "tinytex",
      "Version": "0.43",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "facc02f3d63ed7dd765513c004c394ce",
      "Requirements": [
        "xfun"
      ]
    },
    "tzdb": {
      "Package": "tzdb",
      "Version": "0.3.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "b2e1cbce7c903eaf23ec05c58e59fb5e",
      "Requirements": [
        "cpp11"
      ]
    },
    "utf8": {
      "Package": "utf8",
      "Version": "1.2.2",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c9c462b759a5cc844ae25b5942654d13",
      "Requirements": []
    },
    "uuid": {
      "Package": "uuid",
      "Version": "1.1-0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f1cb46c157d080b729159d407be83496",
      "Requirements": []
    },
    "vctrs": {
      "Package": "vctrs",
      "Version": "0.5.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "970324f6572b4fd81db507b5d4062cb0",
      "Requirements": [
        "cli",
        "glue",
        "lifecycle",
        "rlang"
      ]
    },
    "viridisLite": {
      "Package": "viridisLite",
      "Version": "0.4.1",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "62f4b5da3e08d8e5bcba6cac15603f70",
      "Requirements": []
    },
    "vroom": {
      "Package": "vroom",
      "Version": "1.6.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "64f81fdead6e0d250fb041e175d123ab",
      "Requirements": [
        "bit64",
        "cli",
        "cpp11",
        "crayon",
        "glue",
        "hms",
        "lifecycle",
        "progress",
        "rlang",
        "tibble",
        "tidyselect",
        "tzdb",
        "vctrs",
        "withr"
      ]
    },
    "withr": {
      "Package": "withr",
      "Version": "2.5.0",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "c0e49a9760983e81e55cdd9be92e7182",
      "Requirements": []
    },
    "xfun": {
      "Package": "xfun",
      "Version": "0.36",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "f5baec54606751aa53ac9c0e05848ed6",
      "Requirements": []
    },
    "xml2": {
      "Package": "xml2",
      "Version": "1.3.3",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "40682ed6a969ea5abfd351eb67833adc",
      "Requirements": []
    },
    "yaml": {
      "Package": "yaml",
      "Version": "2.3.6",
      "Source": "Repository",
      "Repository": "RSPM",
      "Hash": "9b570515751dcbae610f29885e025b41",
      "Requirements": []
    }
  }
}

But Dropbox might ruin…

(Advanced) renv with Cloud Storage

(Advanced) Docker


Problems renv can solve are only packages. They may come from differences in

  • R versions ⇒ Always use the latest version of R
  • Non-R dependencies (e.g., geospatial packages) ⇒ Docker can solve
  • OS (only Windows binary produces bugs…) ⇒ Docker can solve


Docker

  • A virtual machine. Write a blueprint (Dockerfile) including information of OS (Linux), Application (R and others), and Packages
  • If you work on Docker, others can perfectly replicate your environment

Handson 1


  1. Clone (or download) the course repositiory
  2. Open the course project (workshop-r-2022.Rproj)
  3. Run renv::restore() in R console
  4. Confirm you can render sample notebooks without any problem


Warning

Please make sure if you are using the latest R version 4.2.2 (2022-10-31).

Cleaning Strategy

Fundamental Theorem of Readability


Fundamental Theorem of Readability

Code should be written to minimize the time it would take for someone else to understand it.


\[ \text{Code} := \arg\min_{c \in \mathcal{C}}\mathbb{E}_i[R_{i}(c)] \]

where

  • \(\mathcal{C}\): Set of codes that work
  • \(i\): A potential reader including yourself at a different time point
  • \(R_{i}(c)\): Time taken by person \(i\) to understand code \(c\)

Naming


For readability, you need to name variables informatively and non-misleadingly

🙆 Good 🙅 Bad
Bool is_female, has_kids female, no_kids
Category industry8, emp3 industry, emp_status
Bins age_bin5, wage_bin10 age, wage

Naming


For readability, you need to name variables informatively and non-misleadingly

🙆 Good 🙅 Bad
Bool is_female, has_kids female, no_kids
Category industry8, emp3 industry, emp_status
Bins age_bin5, wage_bin10 age, wage


Boolean

  • is_*, has_*, should_* indicates the type boolean.
  • Starting with not_*/no_* increases a step of recognition

Naming


For readability, you need to name variables informatively and non-misleadingly

🙆 Good 🙅 Bad
Bool is_female, has_kids female, no_kids
Category industry8, emp3 industry, emp_status
Bins age_bin5, wage_bin10 age, wage


Categorical

  • Attached number indicates if it is categorical and its number

Naming


For readability, you need to name variables informatively and non-misleadingly

🙆 Good 🙅 Bad
Bool is_female, has_kids female, no_kids
Category industry8, emp3 industry, emp_status
Bins age_bin5, wage_bin10 age, wage


Bins of continuous variables

  • Need to avoid the confusion with its continuous variable
  • Attached number shows the width of the bin

Rename at Once

raw <- read_delim(here("data/raw/accident_bike/year=2022/file.txt"),
        delim = ";", show_col_types = FALSE)
Rows: 42,547
Columns: 5
$ num_expediente <dbl> 2.022e+04, 2.022e+04, 2.022e+05, 2.022e+05, 2.022e+05, …
$ fecha          <chr> "01/01/2022", "01/01/2022", "01/01/2022", "01/01/2022",…
$ hora           <time> 01:30:00, 01:30:00, 00:30:00, 00:30:00, 00:30:00, 01:5…
$ localizacion   <chr> "AVDA. ALBUFERA, 19", "AVDA. ALBUFERA, 19", "PLAZA. CAN…
$ numero         <chr> "19", "19", "2", "2", "2", "53", "53", "728", "728", "+…


code <- read_csv(here("data/translate/accident_bike.csv"),
                     show_col_types = FALSE)
renamed <- raw |>
  rename_at(vars(code$spanish), ~code$english)
Rows: 42,547
Columns: 5
$ id_1922    <dbl> 2.022e+04, 2.022e+04, 2.022e+05, 2.022e+05, 2.022e+05, 2.02…
$ date       <chr> "01/01/2022", "01/01/2022", "01/01/2022", "01/01/2022", "01…
$ hms        <time> 01:30:00, 01:30:00, 00:30:00, 00:30:00, 00:30:00, 01:50:00…
$ street     <chr> "AVDA. ALBUFERA, 19", "AVDA. ALBUFERA, 19", "PLAZA. CANOVAS…
$ num_street <chr> "19", "19", "2", "2", "2", "53", "53", "728", "728", "+0050…
spanish english
num_expediente id_1922
fecha date
hora hms
localizacion street
numero num_street
cod_distrito code_district
distrito district
tipo_accidente type_accident
estado_meteorológico weather
tipo_vehiculo type_vehicle
tipo_persona type_person
rango_edad age_c
sexo gender
cod_lesividad code_injury8
lesividad injury8
coordenada_x_utm coord_x
coordenada_y_utm coord_y
positiva_alcohol positive_alcohol
positiva_droga positive_drug

Type: Date & Time


lubridate provides strong date-parsering functions.


lubridate::ymd("2021/08/31")
[1] "2021-08-31"
lubridate::mdy("Sep. 10, 19")
[1] "2019-09-10"
lubridate::dmy_hm("02/04/1999 16:00", tz="America/New_York")
[1] "1999-04-02 16:00:00 EST"

renamed |> select(date, hms) |> head()
# A tibble: 6 × 2
  date       hms   
  <chr>      <time>
1 01/01/2022 01:30 
2 01/01/2022 01:30 
3 01/01/2022 00:30 
4 01/01/2022 00:30 
5 01/01/2022 00:30 
6 01/01/2022 01:50 


renamed |>
  mutate(time = lubridate::dmy_hms(str_c(date, hms), tz = "Europe/Madrid")) |>
  select(date, hms, time) |>
  head()
# A tibble: 6 × 3
  date       hms    time               
  <chr>      <time> <dttm>             
1 01/01/2022 01:30  2022-01-01 01:30:00
2 01/01/2022 01:30  2022-01-01 01:30:00
3 01/01/2022 00:30  2022-01-01 00:30:00
4 01/01/2022 00:30  2022-01-01 00:30:00
5 01/01/2022 00:30  2022-01-01 00:30:00
6 01/01/2022 01:50  2022-01-01 01:50:00

Type: Categorical Variables


renamed |>
  mutate(
    type_person = recode_factor(type_person,
        "Conductor" = "Driver",
        "Pasajero" = "Passenger",
        "Peatón" = "Pedestrian",
        "NULL"= NULL)) |>
  janitor::tabyl(type_person)
 type_person     n    percent
      Driver 34567 0.81244271
   Passenger  6503 0.15284274
  Pedestrian  1477 0.03471455

recode_factor() finishes:

  1. Define as factor variables
  2. Order factor variable
  3. Rename & Translate (labels in plots & tables)
  4. Handle NA values (Next Slides)

Handle NA Values

Some datasets include NA values as string format

unique(renamed$weather) # "Se desconoce" is also essentially NA
[1] "Despejado"      "NULL"           "Se desconoce"   "Lluvia débil"  
[5] "Nublado"        "LLuvia intensa" "Granizando"     "Nevando"       


Solution 1: Define NA values when you load

sol1 <- read_delim(here("data/raw/accident_bike/year=2019/file.txt"),
                 delim = ";", show_col_types = FALSE,
                 na = c("", "NA", "NULL", "Se desconoce", "Desconocido")) |>
        rename(weather = "estado_meteorológico")

unique(sol1$weather)
[1] "Despejado"      NA               "Lluvia débil"   "Nublado"       
[5] "LLuvia intensa" "Granizando"     "Nevando"       


Cannot use when specific numbers as NA values (9, 99,…)

Solution2: na_if()

renamed |>
  mutate(
    weather_old = weather,# Presentation Purpose
    weather = na_if(weather, "Se desconoce"),
    weather = na_if(weather, "NULL"),
    ) |>
  select(weather_old, weather) |>
  head()
# A tibble: 6 × 2
  weather_old weather  
  <chr>       <chr>    
1 Despejado   Despejado
2 Despejado   Despejado
3 NULL        <NA>     
4 NULL        <NA>     
5 NULL        <NA>     
6 Despejado   Despejado


Works for any case. But need to write for each NA value.

Soltion 3: Recode as NULL

renamed |>
  mutate(
    weather_spanish = weather,# Presentation Purpose
    weather = recode_factor(weather,
        "Despejado" = "sunny",
        "Nublado" = "cloud",
        "Lluvia débil" = "soft rain",
        "Lluvia intensa" = "hard rain",
        "LLuvia intensa" = "hard rain",
        "Nevando" = "snow",
        "Granizando" = "hail",
        "Se desconoce" = NULL,
        "NULL" = NULL)) |>
  select(weather_spanish, weather) |>
  head()
# A tibble: 6 × 2
  weather_spanish weather
  <chr>           <fct>  
1 Despejado       sunny  
2 Despejado       sunny  
3 NULL            <NA>   
4 NULL            <NA>   
5 NULL            <NA>   
6 Despejado       sunny  


Only works for categorical variables. But practically useful.

Format: Parquet

(Advanced) Parquet with Large Dataset

Cleaning Workflow

1. Naming

  • Put informative and non-misleading names
  • If necessary, translate the variable names
  • You can use a correspondence table and rename variables at once

2. Determine Types

  • Date: lubridate parsing functions
  • Categorical: recode_factor()
  • NA-values: na_if() and recode_factor()

3. Export

  • Parquet format

Handson 2

Some examples of cleaning

Tips in Plots

Data-ink Ratio


Data-ink Ratio Principle

Maximize the data-ink ratio in a plot:

\[ \text{Data-ink ratio} := \frac{\text{Data-ink}}{\text{Total ink used to print in the graphic}} \]


Collolary

Omit all the proportions of a graphic that can be erased without losing information

Maximize Data-ink Ratio


accident_bike |>
  ggplot(aes(x = type_person, fill = gender)) +
  geom_bar(position = "dodge")

Maximize Data-ink Ratio


accident_bike |>
  ggplot(aes(x = type_person, fill = gender)) +
  geom_bar(position = "dodge") +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.x = element_blank())

Number of Persons Hospitalized

  • Omit axis label. The title of the plot can tell them
  • Omit legend label. The label “gender” does not add any information
  • Omit background grids

More Readability: Order Bar Plot


accident_bike |>
  ggplot(aes(x = fct_rev(type_person),
         fill = fct_rev(gender))) +
  geom_bar(position = "dodge") +
  coord_flip() +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position = c(0.9, 0.1)) +
  guides(fill = guide_legend(reverse = TRUE))

Number of Persons Hospitalized

  • Coord flipped. Reorder the factor variables
  • Put legends inside the plot to make the plot bigger

More Readability: Increase Font Size


accident_bike |>
  ggplot(aes(x = fct_rev(type_person),
         fill = fct_rev(gender))) +
  geom_bar(position = "dodge") +
  coord_flip() +
  labs(x = NULL, y = NULL, fill = NULL) +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position = c(0.9, 0.1),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 25),
        legend.text = element_text(size = 20)) +
  guides(fill = guide_legend(reverse = TRUE))

Number of Persons Hospitalized

R Color Brewer’s Palettes

R Color Brewer’s Palettes


accident_bike |>
  ggplot(aes(x = fct_rev(type_person),
         fill = fct_rev(gender))) +
  geom_bar(position = "dodge") +
  coord_flip() +
  labs(x = NULL, y = NULL, fill = NULL) +
  scale_fill_brewer(palette = "Accent") +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position = c(0.9, 0.1),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 25),
        legend.text = element_text(size = 20)) +
  guides(fill = guide_legend(reverse = TRUE))

Number of Persons Hospitalized

Color-Safe Pallette: Okabe-Ito Palette


accident_bike |>
  ggplot(aes(x = fct_rev(type_person),
         fill = fct_rev(gender))) +
  geom_bar(position = "dodge") +
  coord_flip() +
  labs(x = NULL, y = NULL, fill = NULL) +
  see::scale_fill_okabeito() +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position = c(0.9, 0.1),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 25),
        legend.text = element_text(size = 20)) +
  guides(fill = guide_legend(reverse = TRUE))

Number of Persons Hospitalized

Custom Palette


accident_bike |>
  ggplot(aes(x = fct_rev(type_person),
         fill = fct_rev(gender))) +
  geom_bar(position = "dodge") +
  coord_flip() +
  labs(x = NULL, y = NULL, fill = NULL) +
  scale_fill_manual(values = c("#E7B800", "#00AFBB")) +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position = c(0.9, 0.1),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 25),
        legend.text = element_text(size = 20)) +
  guides(fill = guide_legend(reverse = TRUE))

Number of Persons Hospitalized

Fonts

Goolge Fonts

  • You can download well-designed free fonts
  • My recommendation: Condensed fonts

Roboto Condensed, Fira Sans Condensed, IBM Plex Sans Condensed,…


showtext

  • Your collaborators need to download the fonts
  • font_add_google() and showtext_auto() automatically solve the problem


Roboto Condensed


library(showtext)
font_base  <- "Roboto Condensed"
font_light <- "Roboto Condensed Light 300"
font_add_google(font_base, font_light)
showtext_auto()

accident_bike |>
  ggplot(aes(x = fct_rev(type_person), fill = fct_rev(gender))) +
  geom_bar(position = "dodge") +
  coord_flip() +
  labs(x = NULL, y = NULL, fill = NULL) +
  see::scale_fill_okabeito() +
  theme_minimal() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position = c(0.9, 0.1),
        axis.text.x = element_text(size = 20, family = font_light),
        axis.text.y = element_text(size = 25, family = font_base),
        legend.text = element_text(size = 20, family = font_light)) +
  guides(fill = guide_legend(reverse = TRUE))

Number of Persons Hospitalized

Global Options


Don’t worry. You can set the default theme before plotting.


theme_set(theme_minimal(base_size = 12, base_family = "Roboto Condensed"))
theme_update(
  axis.ticks = element_line(color = "grey92"),
  axis.ticks.length = unit(.5, "lines"),
  panel.grid.minor = element_blank(),
  legend.title = element_text(size = 12),
  legend.text = element_text(color = "grey30"),
  plot.title = element_text(size = 18, face = "bold"),
  plot.subtitle = element_text(size = 12, color = "grey30"),
  plot.caption = element_text(size = 9, margin = margin(t = 15))
)


Alternatively, create a custom theme and color palette (Andrew Heiss’s code)

Third-party Themes: hrbrthemes


accident_bike |>
  ggplot(aes(x = fct_rev(type_person),
         fill = fct_rev(gender))) +
  geom_bar(position = "dodge") +
  coord_flip() +
  labs(x = NULL, y = NULL, fill = NULL) +
  hrbrthemes::scale_fill_ipsum() +
  hrbrthemes::theme_ipsum_rc() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position = c(0.9, 0.1),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 25),
        legend.text = element_text(size = 20)) +
  guides(fill = guide_legend(reverse = TRUE))

Number of Persons Hospitalized

Third-party Themes:: ggpubr & ggsci Plaette


p <- accident_bike |>
  ggplot(aes(x = fct_rev(type_person),
         fill = fct_rev(gender))) +
  geom_bar(position = "dodge") +
  coord_flip() +
  labs(x = NULL, y = NULL, fill = NULL) +
  ggpubr::theme_pubr() +
  theme(panel.grid.minor = element_blank(),
        panel.grid.major.y = element_blank(),
        legend.position = c(0.9, 0.1),
        axis.text.x = element_text(size = 20),
        axis.text.y = element_text(size = 25),
        legend.text = element_text(size = 20)) +
  guides(fill = guide_legend(reverse = TRUE))

ggpubr::set_palette(p, "jco") # choose one of ggsci palette

Number of Persons Hospitalized

Patchwork

library(patchwork)

(p_default + p_custom) / (p_hrbrthemes + p_ggpubr)

Takeaway

Maximize Data-ink Ratio

  • Omit all the unnecessary elements in a plot

Colors & Fonts

  • Color Palette: RColorBrewer, Okabe-Ito, ggsci
  • Fonts: Google Fonts. Especially, condensed fonts.
  • Ready-made Themes: hrbrthemes, ggpubr

Further Readings

Automated Table Creation

kableExtra: Example

tab
# A tibble: 6 × 9
# Groups:   weather [6]
  weather   n_Men_2019 n_Men_2…¹ n_Men…² n_Men…³ n_Wom…⁴ n_Wom…⁵ n_Wom…⁶ n_Wom…⁷
  <fct>          <int>     <int>   <int>   <int>   <int>   <int>   <int>   <int>
1 sunny          24399     14969   19208   19420   11971    6958    9417    9298
2 cloud           1159      1190    1325    1633     555     554     630     774
3 soft rain       2126      1198    1281    1408    1068     542     605     716
4 hard rain        386       202     386     352     222      96     210     179
5 snow               2         2     124       5      NA      NA      38       1
6 hail              11         5       6       4       3       3       1       2
# … with abbreviated variable names ¹​n_Men_2020, ²​n_Men_2021, ³​n_Men_2022,
#   ⁴​n_Women_2019, ⁵​n_Women_2020, ⁶​n_Women_2021, ⁷​n_Women_2022
library(kableExtra)
options(knitr.kable.NA = '')

ktb <- tab |>
  kbl(format = "latex", booktabs = TRUE,
      col.names = c(" ", 2019:2022, 2019:2022)) |>
  add_header_above(c(" ", "Men" = 4, "Women" = 4)) |>
  pack_rows(index = c("Good" = 2, "Bad" = 4))

ktb |>
  save_kable(here("output/tex/kableextra/tb_accident_bike.tex"))

  • booktabs = TRUE for booktabs package in LaTeX
  • You can specify the column names by col.names
  • You can pack columns and rows by add_header_above() and pack_rows()
  • save_kable() saves in a tex file if the file name ends with “.tex”

kableExtra

Dataframe (tibble) to Table

  • Create a tibble table by dplyr::group_by & dpyr::summarize and janitor::tabyl()
  • For regression tables, you can use modelsummary (next slide)

Pack Columns and Rows

  • As far as I know, Python, Julia, and Stata do not allow us to pack them easily

More Complicated Tables

  • You can refer to Hao Zhu’s document
  • If a table contains a mathematical expression, use escape=FALSE. See a discussion in stacoverflow

modelsummary

Given the following regression results,


library(fixest) # for faster regression with fixed effect

models <- list(
    "(1)" = feglm(is_hospitalized ~ type_person + positive_alcohol + positive_drug | age_c + gender,
                family = binomial(logit), data = data),
    "(2)" = feglm(is_hospitalized ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle,
                family = binomial(logit), data = data),
    "(3)" = feglm(is_hospitalized ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle + weather,
                family = binomial(logit), data = data),
    "(4)" = feglm(is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender,
                family = binomial(logit), data = data),
    "(5)" = feglm(is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle,
                family = binomial(logit), data = data),
    "(6)" = feglm(is_died ~ type_person + positive_alcohol + positive_drug | age_c + gender + type_vehicle + weather,
                family = binomial(logit), data = data)
)

modelsummary: Init


modelsummary(models)
(1) (2) (3) (4) (5) (6)
type_personPassenger 0.049 0.530 0.507 −1.781 −1.575 −1.565
(0.104) (0.071) (0.070) (0.759) (0.783) (0.784)
type_personPedestrian 2.124 2.402 2.323 2.280 2.418 2.422
(0.115) (0.066) (0.064) (0.301) (0.287) (0.285)
positive_alcoholTRUE −0.077 0.310 0.353 −13.710 −13.455 −13.492
(0.088) (0.095) (0.093) (0.053) (0.064) (0.063)
Num.Obs. 149918 149831 134006 90852 89300 86330
R2 0.055 0.171 0.165 0.107 0.145 0.148
R2 Adj. 0.054 0.170 0.163 0.086 0.113 0.112
R2 Within 0.047 0.054 0.052 0.073 0.076 0.076
R2 Within Adj. 0.047 0.054 0.052 0.070 0.072 0.073
AIC 62871.0 55210.6 53565.4 1601.9 1552.2 1534.5
BIC 63079.3 55696.5 54085.1 1780.8 1824.8 1834.2
RMSE 0.23 0.22 0.23 0.04 0.04 0.04
Std.Errors by: age_c by: age_c by: age_c by: age_c by: age_c by: age_c
FE: age_c X X X X X X
FE: gender X X X X X X
FE: type_vehicle X X X X
FE: weather X X

modelsummary: Modify Coefficients


cm  <-  c(
    "type_personpassenger" = "Passenger",
    "type_personpedestrian" = "Pedestrian",
    "positive_alcoholTRUE" = "Positive Alcohol"
)

modelsummary(models,
  coef_map = cm
)
(1) (2) (3) (4) (5) (6)
Positive Alcohol −0.077 0.310 0.353 −13.710 −13.455 −13.492
(0.088) (0.095) (0.093) (0.053) (0.064) (0.063)
Num.Obs. 149918 149831 134006 90852 89300 86330
R2 0.055 0.171 0.165 0.107 0.145 0.148
R2 Adj. 0.054 0.170 0.163 0.086 0.113 0.112
R2 Within 0.047 0.054 0.052 0.073 0.076 0.076
R2 Within Adj. 0.047 0.054 0.052 0.070 0.072 0.073
AIC 62871.0 55210.6 53565.4 1601.9 1552.2 1534.5
BIC 63079.3 55696.5 54085.1 1780.8 1824.8 1834.2
RMSE 0.23 0.22 0.23 0.04 0.04 0.04
Std.Errors by: age_c by: age_c by: age_c by: age_c by: age_c by: age_c
FE: age_c X X X X X X
FE: gender X X X X X X
FE: type_vehicle X X X X
FE: weather X X

modelsummary: Modify Statitics


cm  <-  c(
    "type_personpassenger" = "Passenger",
    "type_personpedestrian" = "Pedestrian",
    "positive_alcoholTRUE" = "Positive Alcohol"
)

gm <- tibble(
    raw = c("nobs", "FE: age_c", "FE: gender", "FE: type_vehicle", "FE: weather"),
    clean = c("Observations", "FE: Age Group", "FE: Gender", "FE: Type of Vehicle", "FE: Weather"),
    fmt = c(0, 0, 0, 0, 0)
)

modelsummary(models,
  coef_map = cm,
  gof_map = gm
)
(1) (2) (3) (4) (5) (6)
Positive Alcohol −0.077 0.310 0.353 −13.710 −13.455 −13.492
(0.088) (0.095) (0.093) (0.053) (0.064) (0.063)
Observations 149918 149831 134006 90852 89300 86330
FE: Age Group X X X X X X
FE: Gender X X X X X X
FE: Type of Vehicle X X X X
FE: Weather X X

modelsummary: Stars & Headers


code-line-numbers="7,16"
cm  <-  c(
    "type_personpassenger" = "Passenger",
    "type_personpedestrian" = "Pedestrian",
    "positive_alcoholTRUE" = "Positive Alcohol"
)

gm <- tibble(
    raw = c("nobs", "FE: age_c", "FE: gender", "FE: type_vehicle", "FE: weather"),
    clean = c("Observations", "FE: Age Group", "FE: Gender", "FE: Type of Vehicle", "FE: Weather"),
    fmt = c(0, 0, 0, 0, 0)
)

modelsummary(models,
  stars = c("+" = .1, "*" = .05, "**" = .01),
  coef_map = cm,
  gof_map = gm) |>
  add_header_above(c(" ", "Hospitalization" = 3, "Died within 24 hours" = 3)) 
Hospitalization
Died within 24 hours
(1) (2) (3) (4) (5) (6)
Positive Alcohol −0.077 0.310** 0.353** −13.710** −13.455** −13.492**
(0.088) (0.095) (0.093) (0.053) (0.064) (0.063)
Observations 149918 149831 134006 90852 89300 86330
FE: Age Group X X X X X X
FE: Gender X X X X X X
FE: Type of Vehicle X X X X
FE: Weather X X
+ p < 0.1, * p < 0.05, ** p < 0.01

modelsummary: Export to \(\LaTeX\)


cm  <-  c(
    "type_personpassenger" = "Passenger",
    "type_personpedestrian" = "Pedestrian",
    "positive_alcoholTRUE" = "Positive Alcohol"
)

gm <- tibble(
    raw = c("nobs", "FE: age_c", "FE: gender", "FE: type_vehicle", "FE: weather"),
    clean = c("Observations", "FE: Age Group", "FE: Gender", "FE: Type of Vehicle", "FE: Weather"),
    fmt = c(0, 0, 0, 0, 0)
)

modelsummary(models,
  output = "latex_tabular",
  stars = c("+" = .1, "*" = .05, "**" = .01),
  coef_map = cm,
  gof_map = gm) |>
  add_header_above(c(" ", "Hospitalization" = 3, "Died within 24 hours" = 3)) |>
  row_spec(7, hline_after = T) |>
  save_kable(here("output/tex/modelsummary/reg_accident_bike.tex"))


output = "latex_tabular" produces a tex file not containing table tag

Quarto

What Is Quarto?

Markdown ⇔ \(\LaTeX\)

Interactive Plots

Quarto Presentation